TODO
s_1ha_qres <- qresid(s_1ha)
par(mfrow = c(2,2))
#histogram
plot(density(s_1ha_qres))
#QQ plot
qqnorm(s_1ha_qres); qqline(s_1ha_qres)
#Fitted vs. residuals--binned residuals plot
binnedplot(fitted(s_1ha), residuals(s_1ha, type = "response"))
k.check(s_1ha)
## k' edf k-index p-value
## s(log_size_prev) 9 2.842177e+00 0.9584417 0.935
## s(plot) 4 1.627924e-04 NA NA
## s(spei_history,L) 90 1.077927e+01 NA NA
Basis dimension checking with gam.check() doesn’t appear to work for dlnm crossbasis smooths. Instead I’ll use a method described in the help file ?choose.k to check for adequate knots. Unfortunately, bs = "cs" doesn’t work with dlnm, so I’ll use select = TRUE instead to reduce chance of overfitting.
check_res_edf(s_1ha)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 0
The crossbasis smooth possibly needs more knots.
# plot_lag_slice(s_1ha, "spei_history", lag = 0) + labs(title = "surv, 1ha")
Here you can see how the CIs hardly overlap 0, but it could be an artifact of the line not being allowed to be wiggly enough.
summary(s_1ha)
##
## Family: binomial
## Link function: logit
##
## Formula:
## surv ~ flwr_prev + s(log_size_prev, bs = "cr", k = k[1]) + s(plot,
## bs = "re") + s(spei_history, L, bs = "cb", k = k[2:3], xt = list(bs = "cr"))
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.22347 0.09128 35.312 <2e-16 ***
## flwr_prev1 -0.56668 0.44209 -1.282 0.2
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(log_size_prev) 2.842e+00 9 360.41 <2e-16 ***
## s(plot) 1.628e-04 3 0.00 0.481
## s(spei_history,L) 1.078e+01 23 73.09 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.0626 Deviance explained = 13.8%
## fREML = 12454 Scale est. = 1 n = 9183
draw(s_1ha)
## Warning: Removed 910 rows containing non-finite values (stat_contour).
s_cf_qres <- qresid(s_cf)
par(mfrow = c(2,2))
#histogram
plot(density(s_cf_qres))
#QQ plot
qqnorm(s_cf_qres); qqline(s_cf_qres)
#Fitted vs. residuals--binned residuals plot
binnedplot(fitted(s_cf), residuals(s_cf, type = "response"))
k.check(s_cf)
## k' edf k-index p-value
## s(log_size_prev) 9 3.457925 0.9099995 0.1925
## s(plot) 6 4.336851 NA NA
## s(spei_history,L) 210 12.795976 NA NA
# looking for near zero edf
check_res_edf(s_cf)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 1.1
summary(s_cf)
##
## Family: binomial
## Link function: logit
##
## Formula:
## surv ~ flwr_prev + s(log_size_prev, bs = "cr", k = k[1]) + s(plot,
## bs = "re") + s(spei_history, L, bs = "cb", k = k[2:3], xt = list(bs = "cr"))
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.4656 0.1277 27.147 <2e-16 ***
## flwr_prev1 0.1002 0.2685 0.373 0.709
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(log_size_prev) 3.458 9 1957.82 <2e-16 ***
## s(plot) 4.337 5 37.83 <2e-16 ***
## s(spei_history,L) 12.796 21 186.40 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.113 Deviance explained = 20%
## fREML = 43577 Scale est. = 1 n = 31701
draw(s_cf)
## Warning: Removed 939 rows containing non-finite values (stat_contour).
Here I use a random sub-sample to check that differences between continuous forest and fragments are not purely due to sample size differences, particularly differences in the complexity of the crossbasis smooth.
summary(s_1ha)$edf[3]
## [1] 10.77927
summary(s_cf)$edf[3]
## [1] 12.79598
summary(s_cf_sub)$edf[3]
## [1] 1.886167
draw(s_cf_sub)
## Warning: Removed 910 rows containing non-finite values (stat_contour).
Surface still looks different in a similar way though.
appraise(g_1ha)
k.check(g_1ha)
## k' edf k-index p-value
## s(log_size_prev) 9 4.193955 0.9818493 0.1025
## s(plot) 4 2.826249 NA NA
## s(spei_history,L) 60 18.181547 NA NA
check_res_edf(g_1ha)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 0
summary(g_1ha)
##
## Family: Scaled t(4.729,0.477)
## Link function: identity
##
## Formula:
## log_size ~ flwr_prev + s(log_size_prev, bs = "cr", k = k[1]) +
## s(plot, bs = "re") + s(spei_history, L, bs = "cb", k = k[2:3],
## xt = list(bs = "cr"))
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.17955 0.08340 50.117 <2e-16 ***
## flwr_prev1 0.08603 0.03601 2.389 0.0169 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(log_size_prev) 4.194 9 3006.22 <2e-16 ***
## s(plot) 2.826 3 23.03 <2e-16 ***
## s(spei_history,L) 18.182 21 27.04 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.696 Deviance explained = 63.2%
## fREML = 12189 Scale est. = 1 n = 8527
draw(g_1ha)
## Warning: Removed 910 rows containing non-finite values (stat_contour).
gratia::appraise(g_cf)
k.check(g_cf)
## k' edf k-index p-value
## s(log_size_prev) 24 9.387008 0.967252 0.015
## s(plot) 6 4.080451 NA NA
## s(spei_history,L) 60 15.144504 NA NA
check_res_edf(g_cf)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 0
# plot_lag_slice(g_1ha, "spei_history", lag = 33)
hmm… CIs are super narrow here, but effect size is tiny
summary(g_cf)
##
## Family: Scaled t(3.852,0.414)
## Link function: identity
##
## Formula:
## log_size ~ flwr_prev + s(log_size_prev, bs = "cr", k = k[1]) +
## s(plot, bs = "re") + s(spei_history, L, bs = "cb", k = k[2:3],
## xt = list(bs = "cr"))
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.36045 0.03401 128.214 <2e-16 ***
## flwr_prev1 0.02856 0.01476 1.934 0.0531 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(log_size_prev) 9.387 24 6717.103 <2e-16 ***
## s(plot) 4.080 5 7.035 <2e-16 ***
## s(spei_history,L) 15.145 21 91.033 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.785 Deviance explained = 69.3%
## fREML = 41001 Scale est. = 1 n = 28849
draw(g_cf)
## Warning: Removed 968 rows containing non-finite values (stat_contour).
To check that differences are not purely due to sample size differences, particularly that lower edf in continuous forests is due to higher sample size.
summary(g_1ha)$edf[3]
## [1] 18.18155
summary(g_cf)$edf[3]
## [1] 15.1445
summary(g_cf_sub)$edf[3]
## [1] 16.35041
edf of subsample is similar to full dataset, despite differences in sample size.
draw(g_cf_sub)
## Warning: Removed 910 rows containing non-finite values (stat_contour).
Crossbasis surface looks nearly identical.
f_1ha_qres <- qresid(f_1ha)
par(mfrow = c(2,2))
#histogram
plot(density(f_1ha_qres))
#QQ plot
qqnorm(f_1ha_qres); qqline(f_1ha_qres)
#Fitted vs. residuals--binned residuals plot
binnedplot(fitted(f_1ha), residuals(f_1ha, type = "response"))
k.check(f_1ha)
## k' edf k-index p-value
## s(log_size_prev) 9 3.236234 0.9904763 0.9325
## s(plot) 4 2.424034 NA NA
## s(spei_history,L) 162 13.965739 NA NA
## s(ha_id_number) 1266 68.302965 NA NA
# looking for near zero edf
check_res_edf(f_1ha)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 0
summary(f_1ha)
##
## Family: binomial
## Link function: logit
##
## Formula:
## flwr ~ flwr_prev + s(log_size_prev, bs = "cr", k = k[1]) + s(plot,
## bs = "re") + s(spei_history, L, bs = "cb", k = k[2:3], xt = list(bs = "cr")) +
## s(ha_id_number, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.2436 0.5149 -12.126 < 2e-16 ***
## flwr_prev1 0.6671 0.1758 3.794 0.000148 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(log_size_prev) 3.236 9 357.04 < 2e-16 ***
## s(plot) 2.424 3 19.44 0.000552 ***
## s(spei_history,L) 13.966 28 116.76 < 2e-16 ***
## s(ha_id_number) 68.303 1265 87.08 0.000833 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.304 Deviance explained = 45.7%
## fREML = 10687 Scale est. = 1 n = 8527
draw(f_1ha)
## Warning: Removed 910 rows containing non-finite values (stat_contour).
f_cf_qres <- qresid(f_cf)
par(mfrow = c(2,2))
#histogram
plot(density(f_cf_qres))
#QQ plot
qqnorm(f_cf_qres); qqline(f_cf_qres)
#Fitted vs. residuals--binned residuals plot
binnedplot(fitted(f_cf), residuals(f_cf, type = "response"))
k.check(f_cf)
## k' edf k-index p-value
## s(log_size_prev) 9 5.795259 0.9746386 0.71
## s(plot) 6 3.950101 NA NA
## s(spei_history,L) 210 11.451012 NA NA
# looking for near zero edf
check_res_edf(f_cf)
## # A tibble: 1 x 2
## smooth edf
## <chr> <dbl>
## 1 te(spei_history,L) 1.2
summary(f_cf)
##
## Family: binomial
## Link function: logit
##
## Formula:
## flwr ~ flwr_prev + s(log_size_prev, bs = "cr", k = k[1]) + s(plot,
## bs = "re") + s(spei_history, L, bs = "cb", k = k[2:3], xt = list(bs = "cr"))
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.07388 0.22648 -22.40 <2e-16 ***
## flwr_prev1 0.96103 0.08378 11.47 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(log_size_prev) 5.795 9 1748.08 <2e-16 ***
## s(plot) 3.950 5 47.55 <2e-16 ***
## s(spei_history,L) 11.451 21 415.26 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.188 Deviance explained = 32.7%
## fREML = 41028 Scale est. = 1 n = 28849
draw(f_cf)
## Warning: Removed 968 rows containing non-finite values (stat_contour).
To check that differences are not purely due to sample size differences, particularly that lower edf in continuous forests is due to higher sample size. For flowering, edf is actually slightly higher in CF with a larger sample size. With subsample, edf are more similar.
summary(f_1ha)$edf[3]
## [1] 13.96574
summary(f_cf)$edf[3]
## [1] 11.45101
summary(f_cf_sub)$edf[3]
## [1] 9.883384
draw(f_cf_sub)
## Warning: Removed 910 rows containing non-finite values (stat_contour).
Crossbasis surface looks extremely similar.
Reproducibility receipt
## datetime
Sys.time()
## [1] "2021-08-20 14:53:17 EDT"
## repository
if(requireNamespace('git2r', quietly = TRUE)) {
git2r::repository()
} else {
c(
system2("git", args = c("log", "--name-status", "-1"), stdout = TRUE),
system2("git", args = c("remote", "-v"), stdout = TRUE)
)
}
## Local: revisions /Users/scottericr/Documents/HeliconiaDemography
## Remote: revisions @ origin (https://github.com/BrunaLab/HeliconiaDemography.git)
## Head: [581a757] 2021-08-18: explain why no IPM (closes #81)
## session info
sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices datasets utils
## [8] methods base
##
## other attached packages:
## [1] arm_1.11-2 lme4_1.1-27.1 Matrix_1.3-3 MASS_7.3-54
## [5] qqplotr_0.0.5 Hmisc_4.5-0 Formula_1.2-4 survival_3.2-11
## [9] lattice_0.20-44 readxl_1.3.1 colorspace_2.0-1 rmarkdown_2.7
## [13] statmod_1.4.36 latex2exp_0.5.0 gratia_0.6.0.9112 broom_0.7.6
## [17] patchwork_1.1.1 glue_1.4.2 bbmle_1.0.23.1 dlnm_2.4.5
## [21] mgcv_1.8-36 nlme_3.1-152 lubridate_1.7.10 janitor_2.1.0
## [25] tsModel_0.6 SPEI_1.7 lmomco_2.3.6 tsibble_1.0.1
## [29] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.5 purrr_0.3.4
## [33] readr_1.4.0 tidyr_1.1.3 tibble_3.1.1 ggplot2_3.3.5
## [37] tidyverse_1.3.1 here_1.0.1 tarchetypes_0.2.0 targets_0.4.2
## [41] dotenv_1.0.3 conflicted_1.0.4
##
## loaded via a namespace (and not attached):
## [1] backports_1.2.1 igraph_1.2.6 splines_4.0.2
## [4] digest_0.6.27 htmltools_0.5.1.1 fansi_0.4.2
## [7] magrittr_2.0.1 checkmate_2.0.0 memoise_2.0.0
## [10] cluster_2.1.2 modelr_0.1.8 bdsmatrix_1.3-4
## [13] anytime_0.3.9 jpeg_0.1-8.1 rvest_1.0.0
## [16] haven_2.4.1 xfun_0.22 callr_3.7.0
## [19] crayon_1.4.1 jsonlite_1.7.2 gtable_0.3.0
## [22] DEoptimR_1.0-8 abind_1.4-5 scales_1.1.1
## [25] mvtnorm_1.1-1 DBI_1.1.1 Rcpp_1.0.6
## [28] isoband_0.2.4 htmlTable_2.1.0 foreign_0.8-81
## [31] htmlwidgets_1.5.3 httr_1.4.2 RColorBrewer_1.1-2
## [34] ellipsis_0.3.2 pkgconfig_2.0.3 farver_2.1.0
## [37] nnet_7.3-16 sass_0.3.1 dbplyr_2.1.1
## [40] utf8_1.2.1 tidyselect_1.1.1 labeling_0.4.2
## [43] rlang_0.4.11 munsell_0.5.0 cellranger_1.1.0
## [46] tools_4.0.2 cachem_1.0.4 cli_2.5.0
## [49] generics_0.1.0 evaluate_0.14 fastmap_1.1.0
## [52] yaml_2.2.1 goftest_1.2-2 processx_3.5.2
## [55] knitr_1.33 fs_1.5.0 robustbase_0.93-7
## [58] mvnfast_0.2.5.1 xml2_1.3.2 compiler_4.0.2
## [61] rstudioapi_0.13 png_0.1-7 reprex_2.0.0
## [64] clustermq_0.8.95.1 bslib_0.2.4 stringi_1.6.2
## [67] highr_0.9 ps_1.6.0 nloptr_1.2.2.2
## [70] vctrs_0.3.8 pillar_1.6.0 lifecycle_1.0.0
## [73] jquerylib_0.1.4 data.table_1.14.0 R6_2.5.0
## [76] latticeExtra_0.6-29 renv_0.13.2 gridExtra_2.3
## [79] Lmoments_1.3-1 codetools_0.2-18 boot_1.3-28
## [82] assertthat_0.2.1 rprojroot_2.0.2 withr_2.4.2
## [85] hms_1.1.0 grid_4.0.2 rpart_4.1-15
## [88] coda_0.19-4 minqa_1.2.4 snakecase_0.11.0
## [91] git2r_0.28.0 numDeriv_2016.8-1.1 base64enc_0.1-3